from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LAParams
from pdfminer.pdfparser import PDFParser, PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.pdfdevice import PDFDevice

fp = open("./test_data/22.pdf", "rb")
fw = open("./test_data/22.txt", "a", encoding='utf-8')
parser = PDFParser(fp)
doc = PDFDocument()
parser.set_document(doc)
doc.set_parser(parser)

doc.initialize("")

resource = PDFResourceManager()

laparam = LAParams()

device = PDFPageAggregator(resource, laparams=laparam)

interpreter = PDFPageInterpreter(resource, device)



for page in doc.get_pages():
    interpreter.process_page(page)

    layout = device.get_result()

    for out in layout:
        if hasattr(out, "get_text"):
            fw.write(out.get_text())

print('Done')